rm(list=ls()) # clean up workspace
library(ggplot2)
pairs <- c('YLR406C_YDL075W', 'YER131W_YGL189C', 'YML026C_YDR450W', 'YNL301C_YOL120C', 'YNL069C_YIL133C', 'YMR143W_YDL083C', 'YJL177W_YKL180W', 'YBR191W_YPL079W', 'YER074W_YIL069C', 'YDR418W_YEL054C', 'YBL087C_YER117W', 'YLR333C_YGR027C', 'YMR142C_YDL082W', 'YER102W_YBL072C')
## Now read individual results
ForceOmega.summary.path <- "/Users/xji3/GitFolders/Genconv/ForceOmega/Summary/"
summary_mat <- NULL
for (pair in pairs){
individual.file <- paste(ForceOmega.summary.path, paste("Force_MG94", pair, "nonclock_summary.txt", sep = "_"), sep = "")
if (file.exists(individual.file)){
all <- readLines(individual.file, n = -1)
col.names <- pair
row.names <- strsplit(all[length(all)], ' ')[[1]][-1]
summary_mat <- cbind(summary_mat, as.matrix(read.table(individual.file,
row.names = row.names,
col.names = col.names)))
}
}
assign("ForceOmega_MG94_nonclock_summary", summary_mat)
## Now read sitewise results
for (pair in pairs){
sitewise.file <- paste(ForceOmega.summary.path, paste("Force_MG94", pair, "nonclock_sitewise_summary.txt", sep = "_"), sep = "")
summary_mat <- NULL
if (file.exists(sitewise.file)){
all <- readLines(sitewise.file, n = -1)
col.names <- strsplit(all[1], ' ')[[1]][-1]
row.names <- strsplit(all[length(all)], ' ')[[1]][-1]
summary_mat <- cbind(summary_mat, as.matrix(read.table(sitewise.file,
row.names = row.names,
col.names = col.names)))
assign(paste(pair, "_ForceOmega_sitewise", sep = ""), summary_mat)
}
}
## Now read individual results
IGCexpansion.summary.path <- "/Users/xji3/GitFolders/Genconv/IGCexpansion/Summary/"
summary_mat <- NULL
for (pair in pairs){
individual.file <- paste(IGCexpansion.summary.path, paste("MG94", pair, "nonclock_summary.txt", sep = "_"), sep = "")
if (file.exists(individual.file)){
all <- readLines(individual.file, n = -1)
col.names <- pair
row.names <- strsplit(all[length(all)], ' ')[[1]][-1]
summary_mat <- cbind(summary_mat, as.matrix(read.table(individual.file,
row.names = row.names,
col.names = col.names)))
}
}
assign("MG94_nonclock_summary", summary_mat)
## Now read sitewise results
for (pair in pairs){
sitewise.file <- paste(IGCexpansion.summary.path, paste("MG94", pair, "nonclock_sitewise_summary.txt", sep = "_"), sep = "")
summary_mat <- NULL
if (file.exists(sitewise.file)){
all <- readLines(sitewise.file, n = -1)
col.names <- strsplit(all[1], ' ')[[1]][-1]
row.names <- strsplit(all[length(all)], ' ')[[1]][-1]
summary_mat <- cbind(summary_mat, as.matrix(read.table(sitewise.file,
row.names = row.names,
col.names = col.names)))
assign(paste(pair, "_sitewise", sep = ""), summary_mat)
}
}
Now prepare a function for generating plots
# define edge.list first
edge.list <- c("N0,N1", "N0,kluyveri", "N1,N2", "N1,castellii", "N2,N3", "N2,bayanus",
"N3,N4", "N3,kudriavzevii", "N4,N5", "N4,mikatae", "N5,cerevisiae", "N5,paradoxus")
# posterior.mat <- YBL087C_YER117W_ForceOmega_sitewise
# save.dir <- "/Users/xji3/GitFolders/Genconv/ForceOmega/Plots/"
# paralog <- "YBL087C_YER117W"
plot.sitewise.posterior <- function(posterior.mat, edge.list, save.dir, paralog){
for (edge in edge.list){
sitewise.mut <- posterior.mat[paste("(",edge, ",mut)", sep = ""), ]
sitewise.IGC.1to2 <- posterior.mat[paste("(",edge, ",1->2)", sep = ""), ]
sitewise.IGC.2to1 <- posterior.mat[paste("(",edge, ",2->1)", sep = ""), ]
branch.plot <- ggplot(mapping = aes(x = 1:length(sitewise.mut))) +
xlab("Codon Position") +
ylab("Posterior Expected number of Transitions") +
ggtitle(paste(paralog, edge, "branch posterior plot", sep = " ")) +
geom_line(aes(y = sitewise.mut, colour = "Mutation")) +
geom_line(aes(y = sitewise.IGC.2to1 + sitewise.IGC.1to2, colour = "IGC"))+
ggsave(paste(save.dir, gsub(",", "_",edge), " ", paralog, " branch posterior plot.jpg", sep = ""))
print(branch.plot)
}
all.branch.plot <- ggplot(mapping = aes(x = 1:length(sitewise.mut))) +
xlab("Codon Position") +
ylab("Posterior Expected number of Transitions") +
ggtitle(paste(paralog, "all branch posterior plot", sep = " ")) +
geom_line(aes(y = colSums(posterior.mat[1:12, ]), colour = "Mutation")) +
geom_line(aes(y = colSums(posterior.mat[13:36, ]), colour = "IGC"))+
ggsave(paste(save.dir, paralog, " all branch posterior plot.jpg", sep = ""))
print(all.branch.plot)
}
Now show the difference in % IGC from two inference, it tends to over-estimated percent of changes due to IGC when forcing omega = 1
difference.table <- cbind(colSums(MG94_nonclock_summary[c(34, 36:46, 48:57), ]) / colSums(MG94_nonclock_summary[c(34, 36:46, 48:58, 60:69), ]) ,
colSums(ForceOmega_MG94_nonclock_summary[c(34, 36:46, 48:57), ]) / colSums(ForceOmega_MG94_nonclock_summary[c(34, 36:46, 48:58, 60:69), ]),
colSums(MG94_nonclock_summary[c(34, 36:46, 48:57), ]) / colSums(MG94_nonclock_summary[c(34, 36:46, 48:58, 60:69), ]) -
colSums(ForceOmega_MG94_nonclock_summary[c(34, 36:46, 48:57), ]) / colSums(ForceOmega_MG94_nonclock_summary[c(34, 36:46, 48:58, 60:69), ]),
MG94_nonclock_summary["ll", ],
ForceOmega_MG94_nonclock_summary["ll", ],
MG94_nonclock_summary["ll", ] - ForceOmega_MG94_nonclock_summary["ll", ])
colnames(difference.table) <- c("Free Omega", "Omega = 1", "IGC% difference",
"lnL free omega", "lnL omega = 1", "lnL difference")
rownames(difference.table) <- colnames(MG94_nonclock_summary)
difference.table
## Free Omega Omega = 1 IGC% difference lnL free omega
## YLR406C_YDL075W 0.2032108 0.3341619 -0.13095106 -1178.099
## YER131W_YGL189C 0.2016578 0.3355255 -0.13386770 -1205.185
## YML026C_YDR450W 0.3358874 0.4370866 -0.10119913 -1377.245
## YNL301C_YOL120C 0.2606706 0.3866022 -0.12593161 -2139.308
## YNL069C_YIL133C 0.2159069 0.2950883 -0.07918134 -2322.829
## YMR143W_YDL083C 0.2925145 0.4063426 -0.11382805 -1209.752
## YJL177W_YKL180W 0.2125267 0.3676284 -0.15510175 -1837.058
## YBR191W_YPL079W 0.3158743 0.4080872 -0.09221285 -1467.294
## YER074W_YIL069C 0.3705008 0.4405249 -0.07002413 -1251.963
## YDR418W_YEL054C 0.2130676 0.3397901 -0.12672242 -1739.176
## YBL087C_YER117W 0.2902311 0.4260309 -0.13579978 -1367.679
## YLR333C_YGR027C 0.2892704 0.3776044 -0.08833398 -1261.999
## YMR142C_YDL082W 0.3801116 0.4313587 -0.05124707 -2054.051
## YER102W_YBL072C 0.3571439 0.4191109 -0.06196706 -2058.956
## lnL omega = 1 lnL difference
## YLR406C_YDL075W -1254.447 76.34806
## YER131W_YGL189C -1303.894 98.70933
## YML026C_YDR450W -1447.788 70.54299
## YNL301C_YOL120C -2234.125 94.81757
## YNL069C_YIL133C -2414.719 91.89053
## YMR143W_YDL083C -1317.605 107.85339
## YJL177W_YKL180W -1938.506 101.44747
## YBR191W_YPL079W -1535.235 67.94056
## YER074W_YIL069C -1296.796 44.83228
## YDR418W_YEL054C -1860.419 121.24238
## YBL087C_YER117W -1466.701 99.02205
## YLR333C_YGR027C -1314.778 52.77873
## YMR142C_YDL082W -2096.155 42.10417
## YER102W_YBL072C -2106.727 47.77088
Now plot IGC posterior expected number of transitions
for (pair in pairs){
save.dir <- paste("/Users/xji3/GitFolders/Genconv/IGCexpansion/Plots/", pair, "/", sep = "")
dir.create(save.dir, showWarnings = FALSE)
posterior.mat <- get(paste(pair, "_sitewise", sep = ""))
plot.sitewise.posterior(posterior.mat, edge.list, save.dir, pair)
}
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image